#### Cycles of Silence: Police-Citizen Cooperation in Communities with Criminal Groups ####
#### Andrew Cesare Miller ####

#### DATA & PACKAGES ####

#### + Load Packages ####

library(Hmisc)
library(MASS)
library(erer)
library(sandwich)
library(dplyr)
library(plyr)
library(reshape2)
library(emmeans)
library(effects)
library(reporttools)
library(ggplot2)
library(scales)
library(ggrepel)
library(ggthemes)
library(stargazer)

#### + Load Data ####

### Set working directory to script location
setwd(dirname(rstudioapi::getSourceEditorContext()$path))

### Load survey
load("cycles_replication.RData")
# N.B. The data are not posted publicly to preserve respondent anonymity.

### Simplify dataset object names
s <- survey_data
mc <- market_census

#### ARTICLE CONTENTS ####

#### + Table 1: Retaliation Risk, Cooperation Norms, and Information-sharing ####

### Create vectors for regression coefficients

covs.main <- "retaliation10+coop.others10+"
covs.eth.regFX <- "female+age+education+employees.paid+relig.christ+ethnicity.yoruba+ineffectiveness+mistreatment+market.agege+market.bariga+market.mushin"

### Create cooperation variable with character values 

s$Cooperation_Will <- s$coop.will
s$Cooperation_Will <- replace(s$Cooperation_Will, s$Cooperation_Will==0, "None")
s$Cooperation_Will <- replace(s$Cooperation_Will, s$Cooperation_Will==1, "A little")
s$Cooperation_Will <- replace(s$Cooperation_Will, s$Cooperation_Will==2, "Some")
s$Cooperation_Will <- replace(s$Cooperation_Will, s$Cooperation_Will==3, "Everything")
s$Cooperation_Will <- factor(s$Cooperation_Will, levels=rev(c("Everything","Some","A little","None")))

### Run regression with amount of information as outcome

m1 <- polr(paste("Cooperation_Will ~", paste0(covs.main,covs.eth.regFX)), 
           method="probit", data = s, Hess=TRUE); summary(m1)

### Calculate marginal effects of the mean (MEM)

m1me <- ocME(m1)
tab.me <- m1me$out$ME.all[,rev(colnames(m1me$out$ME.all))]

### Calculate baseline cooperation

# Perceived retaliation likelihood
newdat <- with(s,data.frame(
  retaliation10=seq(0,10,.1),
  coop.others10=mean(coop.others10,na.rm=T), 
  female=mean(female,na.rm=T),
  age=mean(age,na.rm=T),
  education=mean(education,na.rm=T),
  employees.paid=mean(employees.paid,na.rm=T),
  relig.christ=mean(relig.christ,na.rm=T),
  ethnicity.yoruba=mean(ethnicity.yoruba,na.rm=T),
  mistreatment=mean(mistreatment,na.rm=T),
  ineffectiveness=mean(ineffectiveness,na.rm=T),
  market.agege=mean(market.agege),
  market.bariga=mean(market.bariga),
  market.mushin=mean(market.mushin)))
newdat <- cbind(newdat, predict(m1, newdat, type = "probs"))
lnewdat <- melt(newdat, id.vars = c("retaliation10"),
                variable.name = "Level", value.name="Probability")
lnewdat$retaliation10 <- lnewdat$retaliation10/10
lnewdat <- subset(lnewdat,Level=="None" | Level=="A little" | Level=="Some" | Level=="Everything")
base.retal <- rev(subset(lnewdat,retaliation10==0)$Probability)

# Perceived cooperation norms
newdat <- with(s,data.frame(
  retaliation10=mean(retaliation10,na.rm=T),
  coop.others10=seq(0,10,.1), 
  female=mean(female,na.rm=T),
  age=mean(age,na.rm=T),
  education=mean(education,na.rm=T),
  employees.paid=mean(employees.paid,na.rm=T),
  relig.christ=mean(relig.christ,na.rm=T),
  ethnicity.yoruba=mean(ethnicity.yoruba,na.rm=T),
  mistreatment=mean(mistreatment,na.rm=T),
  ineffectiveness=mean(ineffectiveness,na.rm=T),
  market.agege=mean(market.agege),
  market.bariga=mean(market.bariga),
  market.mushin=mean(market.mushin)))
newdat <- cbind(newdat, predict(m1, newdat, type = "probs"))
lnewdat <- melt(newdat, id.vars = c("coop.others10"),
                variable.name = "Level", value.name="Probability")
lnewdat <- subset(lnewdat, Level=="None" | Level=="A little" | Level=="Some" | Level=="Everything")
lnewdat$coop.others10 <- lnewdat$coop.others10/10
base.coop <- rev(subset(lnewdat,coop.others10==0)$Probability)

### Calculate percent changes

# Perceived retaliation likelihood
per.retal <-  rev(m1me$out$ME.all[1,]) / base.retal

# Perceived cooperation norms
per.coop <- rev(m1me$out$ME.all[2,]) / base.coop

### Extract coefficients and standard errors

coef.se <- data.frame(rbind(t(m1me$out$ME.Everything[1:2,1:2]),
      t(m1me$out$ME.Some[1:2,1:2]),
      t(m1me$out$`ME.A little`[1:2,1:2]),
      t(m1me$out$ME.None[1:2,1:2])))
coef.se$level <- c("Everything","Everything",
                   "Some","Some",
                   "A little","A little",
                   "None","None")
coef.retal <- coef.se[c(1,3,5,7),"retaliation10"]
se.retal <- coef.se[c(2,4,6,8),"retaliation10"]
coef.coop <- coef.se[c(1,3,5,7),"coop.others10"]
se.coop <- coef.se[c(2,4,6,8),"coop.others10"]

### Create table

tab1 <- rbind(per.retal,coef.retal,se.retal,base.retal,
   per.coop,coef.coop,se.coop,base.coop)
row.names(tab1) <- c("Retaliation",NA,NA,NA,"Cooperation Norms",NA,NA,NA)
colnames(tab1) <- c("Everything","Some","A little","None")
tab1 <- as.data.frame(tab1)
tab1$label <- c("% Change","MEM","SE","Baseline",
                "% Change","MEM","SE","Baseline")
tab1$Everything <- round(tab1$Everything,3)
tab1$Some <- round(tab1$Some, 3)
tab1$`A little` <- round(tab1$`A little`,3)
tab1$None <- round(tab1$None, 3)
tab1 <- tab1[,c(5,1:4)]
print(tab1)

#### + Figure 1: Differences in Predicted Information-sharing ####

### Calculate first differences

# Perceived retaliation likelihood
newdat <- with(s,data.frame(
  retaliation10=c(0,10),
  coop.others10=mean(coop.others10,na.rm=T), 
  female=mean(female,na.rm=T),
  age=mean(age,na.rm=T),
  education=mean(education,na.rm=T),
  employees.paid=mean(employees.paid,na.rm=T),
  relig.christ=mean(relig.christ,na.rm=T),
  ethnicity.yoruba=mean(ethnicity.yoruba,na.rm=T),
  mistreatment=mean(mistreatment,na.rm=T),
  ineffectiveness=mean(ineffectiveness,na.rm=T),
  market.agege=mean(market.agege),
  market.bariga=mean(market.bariga),
  market.mushin=mean(market.mushin)))
probs <- marginaleffects::predictions(m1,newdata = newdat, type = "probs")
Prob_difference1 <- probs[8,3] - probs[7,3]
SE_difference1 <- sqrt(probs[8,4]^2 + probs[7,4]^2)

# Perceived cooperation Norms
newdat <- with(s,data.frame(
  retaliation10=mean(retaliation10,na.rm=T),
  coop.others10=c(0,10), 
  female=mean(female,na.rm=T),
  age=mean(age,na.rm=T),
  education=mean(education,na.rm=T),
  employees.paid=mean(employees.paid,na.rm=T),
  relig.christ=mean(relig.christ,na.rm=T),
  ethnicity.yoruba=mean(ethnicity.yoruba,na.rm=T),
  mistreatment=mean(mistreatment,na.rm=T),
  ineffectiveness=mean(ineffectiveness,na.rm=T),
  market.agege=mean(market.agege),
  market.bariga=mean(market.bariga),
  market.mushin=mean(market.mushin)))
newdat.pred <- predict(m1, newdat, type = "probs", se.fit=TRUE)
probs <- marginaleffects::predictions(m1,newdata = newdat, type = "probs")
Prob_difference2 <- probs[8,3] - probs[7,3]
SE_difference2 <- sqrt(probs[8,4]^2 + probs[7,4]^2)

# Female
newdat <- with(s,data.frame(
  retaliation10=mean(retaliation10,na.rm=T),
  coop.others10=mean(coop.others10,na.rm=T), 
  female=c(0,1),
  age=mean(age,na.rm=T),
  education=mean(education,na.rm=T),
  employees.paid=mean(employees.paid,na.rm=T),
  relig.christ=mean(relig.christ,na.rm=T),
  ethnicity.yoruba=mean(ethnicity.yoruba,na.rm=T),
  mistreatment=mean(mistreatment,na.rm=T),
  ineffectiveness=mean(ineffectiveness,na.rm=T),
  market.agege=mean(market.agege),
  market.bariga=mean(market.bariga),
  market.mushin=mean(market.mushin)))
probs <- marginaleffects::predictions(m1,newdata = newdat, type = "probs")
Prob_difference3 <- probs[8,3] - probs[7,3]
SE_difference3 <- sqrt(probs[8,4]^2 + probs[7,4]^2)

# Age
newdat <- with(s,data.frame(
  retaliation10=mean(retaliation10,na.rm=T),
  coop.others10=mean(coop.others10,na.rm=T), 
  female=mean(female,na.rm=T),
  age=c(min(age,na.rm=T),max(age,na.rm=T)),
  education=mean(education,na.rm=T),
  employees.paid=mean(employees.paid,na.rm=T),
  relig.christ=mean(relig.christ,na.rm=T),
  ethnicity.yoruba=mean(ethnicity.yoruba,na.rm=T),
  mistreatment=mean(mistreatment,na.rm=T),
  ineffectiveness=mean(ineffectiveness,na.rm=T),
  market.agege=mean(market.agege),
  market.bariga=mean(market.bariga),
  market.mushin=mean(market.mushin)))
probs <- marginaleffects::predictions(m1,newdata = newdat, type = "probs")
Prob_difference4 <- probs[8,3] - probs[7,3]
SE_difference4 <- sqrt(probs[8,4]^2 + probs[7,4]^2)

# Education
newdat <- with(s,data.frame(
  retaliation10=mean(retaliation10,na.rm=T),
  coop.others10=mean(coop.others10,na.rm=T), 
  female=mean(female,na.rm=T),
  age=mean(age,na.rm=T),
  education=c(min(education,na.rm=T),max(education,na.rm=T)),
  employees.paid=mean(employees.paid,na.rm=T),
  relig.christ=mean(relig.christ,na.rm=T),
  ethnicity.yoruba=mean(ethnicity.yoruba,na.rm=T),
  mistreatment=mean(mistreatment,na.rm=T),
  ineffectiveness=mean(ineffectiveness,na.rm=T),
  market.agege=mean(market.agege),
  market.bariga=mean(market.bariga),
  market.mushin=mean(market.mushin)))
probs <- marginaleffects::predictions(m1,newdata = newdat, type = "probs")
Prob_difference5 <- probs[8,3] - probs[7,3]
SE_difference5 <- sqrt(probs[8,4]^2 + probs[7,4]^2)

# Number of Employees
newdat <- with(s,data.frame(
  retaliation10=mean(retaliation10,na.rm=T),
  coop.others10=mean(coop.others10,na.rm=T), 
  female=mean(female,na.rm=T),
  age=mean(age,na.rm=T),
  education=mean(education,na.rm=T),
  employees.paid=c(min(employees.paid,na.rm=T),max(employees.paid,na.rm=T)),
  relig.christ=mean(relig.christ,na.rm=T),
  ethnicity.yoruba=mean(ethnicity.yoruba,na.rm=T),
  mistreatment=mean(mistreatment,na.rm=T),
  ineffectiveness=mean(ineffectiveness,na.rm=T),
  market.agege=mean(market.agege),
  market.bariga=mean(market.bariga),
  market.mushin=mean(market.mushin)))
probs <- marginaleffects::predictions(m1,newdata = newdat, type = "probs")
Prob_difference6 <- probs[8,3] - probs[7,3]
SE_difference6 <- sqrt(probs[8,4]^2 + probs[7,4]^2)

# Religion
newdat <- with(s,data.frame(
  retaliation10=mean(retaliation10,na.rm=T),
  coop.others10=mean(coop.others10,na.rm=T), 
  female=mean(female,na.rm=T),
  age=mean(age,na.rm=T),
  education=mean(education,na.rm=T),
  employees.paid=mean(employees.paid,na.rm=T),
  relig.christ=c(0,1),
  ethnicity.yoruba=mean(ethnicity.yoruba,na.rm=T),
  mistreatment=mean(mistreatment,na.rm=T),
  ineffectiveness=mean(ineffectiveness,na.rm=T),
  market.agege=mean(market.agege),
  market.bariga=mean(market.bariga),
  market.mushin=mean(market.mushin)))
probs <- marginaleffects::predictions(m1,newdata = newdat, type = "probs")
Prob_difference7 <- probs[8,3] - probs[7,3]
SE_difference7 <- sqrt(probs[8,4]^2 + probs[7,4]^2)

# Ethnicity
newdat <- with(s,data.frame(
  retaliation10=mean(retaliation10,na.rm=T),
  coop.others10=mean(coop.others10,na.rm=T), 
  female=mean(female,na.rm=T),
  age=mean(age,na.rm=T),
  education=mean(education,na.rm=T),
  employees.paid=mean(employees.paid,na.rm=T),
  relig.christ=mean(relig.christ,na.rm=T),
  ethnicity.yoruba=c(0,1),
  mistreatment=mean(mistreatment,na.rm=T),
  ineffectiveness=mean(ineffectiveness,na.rm=T),
  market.agege=mean(market.agege),
  market.bariga=mean(market.bariga),
  market.mushin=mean(market.mushin)))
probs <- marginaleffects::predictions(m1,newdata = newdat, type = "probs")
Prob_difference8 <- probs[8,3] - probs[7,3]
SE_difference8 <- sqrt(probs[8,4]^2 + probs[7,4]^2)

# Mistreatment
newdat <- with(s,data.frame(
  retaliation10=mean(retaliation10,na.rm=T),
  coop.others10=mean(coop.others10,na.rm=T), 
  female=mean(female,na.rm=T),
  age=mean(age,na.rm=T),
  education=mean(education,na.rm=T),
  employees.paid=mean(employees.paid,na.rm=T),
  relig.christ=mean(relig.christ,na.rm=T),
  ethnicity.yoruba=mean(ethnicity.yoruba,na.rm=T),
  mistreatment=c(min(mistreatment,na.rm=T),max(mistreatment,na.rm=T)),
  ineffectiveness=mean(ineffectiveness,na.rm=T),
  market.agege=mean(market.agege),
  market.bariga=mean(market.bariga),
  market.mushin=mean(market.mushin)))
probs <- marginaleffects::predictions(m1,newdata = newdat, type = "probs")
Prob_difference9 <- probs[8,3] - probs[7,3]
SE_difference9 <- sqrt(probs[8,4]^2 + probs[7,4]^2)

# Ineffectiveness
newdat <- with(s,data.frame(
  retaliation10=mean(retaliation10,na.rm=T),
  coop.others10=mean(coop.others10,na.rm=T), 
  female=mean(female,na.rm=T),
  age=mean(age,na.rm=T),
  education=mean(education,na.rm=T),
  employees.paid=mean(employees.paid,na.rm=T),
  relig.christ=mean(relig.christ,na.rm=T),
  ethnicity.yoruba=mean(ethnicity.yoruba,na.rm=T),
  mistreatment=mean(mistreatment,na.rm=T),
  ineffectiveness=c(min(ineffectiveness,na.rm=T),max(ineffectiveness,na.rm=T)),
  market.agege=mean(market.agege),
  market.bariga=mean(market.bariga),
  market.mushin=mean(market.mushin)))
probs <- marginaleffects::predictions(m1,newdata = newdat, type = "probs")
Prob_difference10 <- probs[8,3] - probs[7,3]
SE_difference10 <- sqrt(probs[8,4]^2 + probs[7,4]^2)

### Create dataframe

Variable_Names2 <- factor(x=c("Retalliation","Cooperation Norms","Gender","Age","Education","No. of Employees","Religion","Ethnicity","Mistreatment","Ineffectiveness"), # ,"Market: Agege", "Market: Bariga","Market: Mushin"
                          levels=rev(c("Retalliation","Cooperation Norms","Gender","Age","Education","No. of Employees","Religion","Ethnicity","Mistreatment","Ineffectiveness")))
model2Frame <- data.frame(Variable = Variable_Names2,
                          Coefficient = c(Prob_difference1,Prob_difference2,Prob_difference3,
                                          Prob_difference4,Prob_difference5,
                                          Prob_difference6,Prob_difference7,Prob_difference8,
                                          Prob_difference9,Prob_difference10),
                          SE = c(SE_difference1,SE_difference2,SE_difference3,
                                 SE_difference4,SE_difference5,
                                 SE_difference6,SE_difference7,SE_difference8,
                                 SE_difference9,SE_difference10),
                          Color_Code = c("a","a",rep("c",8)),
                          modelName = "Standardized Effects")
model2Frame$Labels <- c(
  "0% to 100% Likelihood",
  "0% to 100% Willing",
  "Male to Female",
  "18-30 to Over 60",
  "Primary or less to Post-graduate",
  "0 to 3+",
  "Non-Christian to Christian",
  "Non-Yoruba to Yoruba",
  "Never to Always Mistreat",
  "Always to Never Arrest"
)

### Create plot

interval1 <- -qnorm((1-0.9)/2)
interval2 <- -qnorm((1-0.95)/2) 
plot1 <- ggplot(model2Frame,aes(color=Color_Code))
plot1 <- plot1 + geom_hline(yintercept = 0, colour = gray(1/2), lty = 2)
plot1 <- plot1 + geom_linerange(aes(x = Variable, ymin = Coefficient - SE*interval1,
                                ymax = Coefficient + SE*interval1),
                            lwd = 1, position = position_dodge(width = 1/2),
                            show.legend = F)
plot1 <- plot1 + geom_pointrange(aes(x = Variable, y = Coefficient, 
                                 ymin = Coefficient - SE*interval2,
                                 ymax = Coefficient + SE*interval2),
                             lwd = 1/2, position = position_dodge(width = 1/2),
                             show.legend = F)
plot1 <- plot1 + scale_y_continuous(limits=c(-.6,.6),
                                breaks=c(-.60, -.40, -.20, 0, .20, .40, .60),
                                label = c("-60pp","-40pp", "-20pp", "0pp","20pp","40pp","60pp"))
plot1 <- plot1 + geom_label(data=model2Frame, aes(x=Variable, y=Coefficient, label = Labels, colour=Color_Code), 
                        nudge_x = 0.325, inherit.aes=T, size=3, # fill="white",alpha=1,
                        na.rm = FALSE,show.legend = F)
plot1 <- plot1 + ylab("Change in Probability of Sharing All Information") + xlab(element_blank())
plot1 <- plot1 + coord_flip() + scale_color_manual(values=c("black","gray")) + theme_bw(); plot1

#### + Figure 2: Predicted Information Loss from Norm Suppression ####

### Predict baseline

newdat <- with(s,data.frame(
  retaliation10=mean(retaliation10,na.rm=T),
  coop.others10=seq(0,10,.1), 
  female=mean(female,na.rm=T),
  age=mean(age,na.rm=T),
  education=mean(education,na.rm=T),
  employees.paid=mean(employees.paid,na.rm=T),
  relig.christ=mean(relig.christ,na.rm=T),
  ethnicity.yoruba=mean(ethnicity.yoruba,na.rm=T),
  mistreatment=mean(mistreatment,na.rm=T),
  ineffectiveness=mean(ineffectiveness,na.rm=T),
  market.agege=mean(market.agege),
  market.bariga=mean(market.bariga),
  market.mushin=mean(market.mushin)))
newdat <- cbind(newdat, predict(m1, newdat, type = "probs"))
newdat$`At least\na little info` <- newdat$`A little` + newdat$Some + newdat$Everything
newdat$`At least\nsome info` <- newdat$Some + newdat$Everything
newdat$`All info` <- newdat$Everything
newdat$`No info` <- newdat$None
lnewdat <- melt(newdat, id.vars = c("coop.others10"),
                variable.name = "Level", value.name="Probability")
lnewdat_coop <- subset(lnewdat, Level=="At least\na little info" | Level=="At least\nsome info" | Level=="All info")
lnewdat_coop$coop.others10 <- lnewdat_coop$coop.others10/10

### Calcuate norm suppression

# Cooperation norms variables as percent
s$coop.others10_per <- s$coop.others10/10

# Standard error function
std <- function(x) sd(x,na.rm=T)/sqrt(sum(!is.na(x)))

# Perceived Average
coop.perceive.avg <- mean(s$coop.others10_per*100,na.rm=T); coop.perceive.avg
coop.perceive.se <- std(s$coop.others10_per*100); coop.perceive.se

# Reported Actual
coop.actual.avg <- mean(s$coop.will.bin*100,na.rm=T); coop.actual.avg
coop.actual.se <- std(s$coop.will.bin*100); coop.actual.se

# Difference btwn Perceived and Actual
coop.bias.avg <- coop.perceive.avg - coop.actual.avg

# Confidence intervals
alpha = 0.05
degrees.freedom = nrow(s) - 1
t.score = qt(p=alpha/2, df=degrees.freedom,lower.tail=F)
margin.error.perceive <- t.score * coop.perceive.se
margin.error.actual <- t.score * coop.actual.se

### Create plot
label.df <- subset(lnewdat_coop,coop.others10==1)
label.mis <- subset(lnewdat_coop,coop.others10==round(coop.actual.avg/100,2)|
                      coop.others10==round(coop.perceive.avg/100,2))
label.mis$Prob.Label <- paste0(round(label.mis$Probability*100,0),"%")
margin<-0.035
plot1 <- ggplot(lnewdat_coop, aes(x = coop.others10, y = Probability)) +
  scale_x_continuous(labels=percent_format(accuracy=1), breaks = seq(0,1,by=.2), limits = c(0,1.125)) +
  scale_y_continuous(labels=percent_format(accuracy=1), breaks = seq(0,1,by=.2), limits = c(0,1.05)) + 
  labs(x="Perceived Proporiton of Community Willing to Cooperate", y="Probability of Sharing Information Amount")+
  geom_line(aes(colour = Level)) + 
  geom_label(data=label.df, aes(x=coop.others10, y=Probability, label = Level,colour=Level), 
             nudge_x = 0.085,inherit.aes=T,size=3,
             na.rm = FALSE)+
  geom_label(data=label.mis, aes(x=coop.others10, y=Probability, label = Prob.Label,colour=Level), 
             nudge_x = 0.065, inherit.aes=T,size=3, #fill="white",alpha=1,
             na.rm = FALSE)+
  annotate("text", x=coop.actual.avg/100-.1, y=1.025, label="Reported\nActual",size=3) +
  annotate("text", x=coop.perceive.avg/100-.1, y=1.025, label="Perceived\nAverage",size=3) +
  geom_vline(aes(xintercept = coop.perceive.avg/100), linetype="dotted",
             lwd=.5,show.legend = F) +
  geom_vline(aes(xintercept = coop.actual.avg/100),linetype="dotted",color="black",
             lwd=.5,show.legend = F) +
  annotate('ribbon', y = c(-Inf, Inf), 
           xmin = (coop.actual.avg-margin.error.actual)/100, 
           xmax = (coop.actual.avg+margin.error.actual)/100, 
           alpha = 0.2, fill = 'black')+
  annotate('ribbon', y = c(-Inf, Inf), 
           xmin = (coop.perceive.avg-margin.error.perceive)/100, 
           xmax = (coop.perceive.avg+margin.error.perceive)/100, 
           alpha = 0.2, fill = 'black')+
  annotate("segment", xend=coop.perceive.avg/100+margin, x=coop.actual.avg/100-margin, 
           y=0.95, yend=0.95, colour = "black", arrow=arrow(length = unit(0.25, "cm"))) +
  theme_few()+theme(legend.position = "none")+scale_colour_grey(start = 0,end = .7); plot1

#### + Figure 3: Intervention Effect on Information-sharing in VR Vignette ####

### Transform cooperation variable for vignette to character vector

s$Coop_Will_Experiment <- s$exp.coop
s$Coop_Will_Experiment <- replace(s$Coop_Will_Experiment, s$Coop_Will_Experiment==0, "Nothing")
s$Coop_Will_Experiment <- replace(s$Coop_Will_Experiment, s$Coop_Will_Experiment==1, "A little")
s$Coop_Will_Experiment <- replace(s$Coop_Will_Experiment, s$Coop_Will_Experiment==2, "Some")
s$Coop_Will_Experiment <- replace(s$Coop_Will_Experiment, s$Coop_Will_Experiment==3, "Everything")
s$Coop_Will_Experiment <- factor(s$Coop_Will_Experiment, levels=rev(c("Everything","Some","A little","Nothing")))

### Create vector for regressions

outcome.probit <- paste0("Coop_Will_Experiment","~")
covs.reg.probit <- "female+age+education+employees.paid+relig.christ+market.agege+market.bariga+market.mushin"
variables.probit <- paste0("treat.anon*treat.social*police.coethnic+", covs.reg.probit)

### Run probit regressions

# All respondents
m1_all <- polr(paste(outcome.probit, variables.probit), 
               method="probit", data = s, Hess=TRUE); summary(m1_all)
ctable <- coef(summary(m1_all))
p <- pnorm(abs(ctable[, "t value"]), lower.tail = FALSE) * 2 
ctable_all <- cbind(ctable, "p value" = p)

# Yoruba respondents 
m1_y <- polr(paste(outcome.probit, variables.probit), method="probit",data = subset(s,ethnicity.yoruba==1), Hess=TRUE); summary(m1_y)
ctable <- coef(summary(m1_y))
p <- pnorm(abs(ctable[, "t value"]), lower.tail = FALSE) * 2 
ctable_y <- cbind(ctable, "p value" = p)

# Non-Yoruba respondents
m1_ny <- polr(paste(outcome.probit, variables.probit), method="probit",data = subset(s,ethnicity.yoruba!=1), Hess=TRUE); summary(m1_ny)
ctable <- coef(summary(m1_ny))
p <- pnorm(abs(ctable[, "t value"]), lower.tail = FALSE) * 2 
ctable_ny <- cbind(ctable, "p value" = p)

### Calculate predicted probabilities 

pred <- function(df_name,model_name, intervention_name, var_name, respondents_name, anon, social,coethnic){
  require(reshape2)
  newdat <- with(df_name,data.frame(
    treat.anon=anon,
    treat.social=social,
    police.coethnic=coethnic,
    female=mean(female),
    age=mean(age,na.rm=T),
    education=mean(education,na.rm=T),
    employees.paid=mean(employees.paid),
    relig.christ=mean(relig.christ),
    market.agege=mean(market.agege),
    market.bariga=mean(market.bariga),
    market.mushin=mean(market.mushin)
  ))
  newdat <- cbind(newdat, predict(model_name, newdat, type = "probs", se.fit=TRUE))
  probs <- marginaleffects::predictions(model_name,newdata = newdat, type = "probs")
  probs <- cbind(rep(c("Treatment","Control"),4),probs[,c(2:4)])
  colnames(probs) <- c("Status","Level","Probability","SE")
  probs$Prob.Label <- paste0(round(probs$Probability*100,0),"%")
  probs$Status <- factor(probs$Status,levels=c("Treatment","Control"))
  probs$Intervention <- intervention_name
  probs$Respondents <- respondents_name
  return(probs)
}
pred_anon_all <- pred(df_name=s,model_name=m1_all,anon=c(1,0),social=0,coethnic=0,var_name="treat.anon",intervention_name="Anonymity",respondents_name="All")
pred_anon_y <- pred(df_name=subset(s,ethnicity.yoruba==1),model_name=m1_y,anon=c(1,0),social=0,coethnic=0,var_name="treat.anon",intervention_name="Anonymity",respondents_name="Yoruba")
pred_anon_ny <- pred(df_name=subset(s,ethnicity.yoruba==0),model_name=m1_ny,anon=c(1,0),social=0,coethnic=0,var_name="treat.anon",intervention_name="Anonymity",respondents_name="Non-Yoruba")
pred_social_all <- pred(df_name=s,model_name=m1_all,anon=0,social=c(1,0),coethnic=0,var_name="treat.social",intervention_name="Awareness",respondents_name="All")
pred_social_y <- pred(df_name=subset(s,ethnicity.yoruba==1),model_name=m1_y,anon=0,social=c(1,0),coethnic=0,var_name="treat.social",intervention_name="Awareness",respondents_name="Yoruba")
pred_social_ny <- pred(df_name=subset(s,ethnicity.yoruba==0),model_name=m1_ny,anon=0,social=c(1,0),coethnic=0,var_name="treat.social",intervention_name="Awareness",respondents_name="Non-Yoruba")

### Create dataframe with predictions

pred_all <- rbind(
  pred_anon_all,pred_anon_y,pred_anon_ny,
  pred_social_all,pred_social_y,pred_social_ny
)
pred_all$Respondents[pred_all$Respondents=="All"] <- "All Respondents"
pred_all$Respondents <- factor(pred_all$Respondents,
                               levels = c("All Respondents","Yoruba","Non-Yoruba"))
pred_all$Level <- factor(pred_all$Level,
                         levels=rev(c("Everything","Some","A little","Nothing")))
pred_all$Intervention <- replace(pred_all$Intervention, pred_all$Intervention=="Anonymity", "Anonymity Intervention")
pred_all$Intervention <- replace(pred_all$Intervention, pred_all$Intervention=="Awareness", "Awareness Intervention")
ct <- data.frame(round(rbind(ctable_all[1:2,1:2],
                             ctable_y[1:2,1:2],
                             ctable_ny[1:2,1:2]),3))
ct <- ct[c(1,3,5,2,4,6),]
names(ct) <- c("value","se")
ct$se <- paste0(paste0("(",ct$se),")")
ct$value <- paste0(ct$value,c("***","","***","*","",""))
ct$coefs <- paste(ct$value,ct$se)
ct$coefs <- paste("Coef:",ct$coefs)
coef.vals <- ct$coefs
labs <- data.frame(
  Status="Treatment",Level="Everything",
  Intervention=c(rep("Anonymity Intervention",3),rep("Awareness Intervention",3)),
  Respondents=rep(c("All Respondents","Yoruba","Non-Yoruba"),2),
  Coefficients=coef.vals
)
pred_all <- merge(pred_all,labs,all.x=T,by=c("Intervention","Respondents","Status","Level"))

### Create plot

interval1 <- -qnorm((1-0.9)/2)  
interval2 <- -qnorm((1-0.95)/2) 
plot1 <- ggplot(pred_all,aes(x=Probability,y=Level,color=Status))+
  geom_point(size=2, position = position_dodge(width = -1/2)) + 
  geom_linerange(aes(y = Level, xmin = Probability - SE*interval2,
                     xmax = Probability + SE*interval2),
                 lwd = 1,
                 position = position_dodge(width = -1/2),
                 show.legend = F)+
  geom_label_repel(data=pred_all, aes(y=Level, x=Probability, label = Prob.Label,color=Status), 
                   inherit.aes=T, size=3, na.rm = FALSE,show.legend = FALSE,
                   position = position_dodge(width = -1/2),
                   segment.color = NA)+
  scale_y_discrete(labels = c("Everything" = "All Info.", "Some" = "Some",
                              "A little" = "A little", "Nothing" = "None"))+
  scale_x_continuous(labels=percent_format(accuracy=1), breaks = seq(0,1,by=.1), limits = c(0,.65)) +
  geom_text(data=pred_all,aes(x=.525,y=.75,,label=Coefficients),size=3)+
  labs(y="Information-sharing Amount", x="Probability of Sharing given Amount of Information") +
  theme_few()+theme(legend.position = "bottom",legend.title = element_blank(),
                    panel.grid.major.y = element_line(colour = "black",size=.2)
  )+scale_colour_grey(start = 0,end = .7)+
  facet_grid(facets = c("Respondents","Intervention")); plot1

#### + Figure 4: Anonymity Effect on Perceived Retaliation Risk in VR Vignette ####

### Transform risk outcome to character vector

s$Risk_Experiment <- s$exp.risk
s$Risk_Experiment <- replace(s$Risk_Experiment,s$Risk_Experiment==3, "Very risky")
s$Risk_Experiment <- replace(s$Risk_Experiment,s$Risk_Experiment==2, "Risky")
s$Risk_Experiment <- replace(s$Risk_Experiment,s$Risk_Experiment==1, "A little risky")
s$Risk_Experiment <- replace(s$Risk_Experiment,s$Risk_Experiment==0, "Not at all risky")
s$Risk_Experiment <- factor(s$Risk_Experiment, levels=c("Very risky","Risky","A little risky","Not at all risky"))

### Create vectors for variables

outcome.probit.risk <- paste0("Risk_Experiment","~")
covs.reg.probit <- "female+age+education+employees.paid+relig.christ+market.agege+market.bariga+market.mushin"
variables.probit <- paste0("treat.anon*treat.social*police.coethnic+", covs.reg.probit)

### Run probit regressions (all respondents)

m1_all <- polr(paste(outcome.probit.risk, variables.probit), 
               method="probit", data = s, Hess=TRUE); summary(m1_all)
ctable <- coef(summary(m1_all))
p <- pnorm(abs(ctable[, "t value"]), lower.tail = FALSE) * 2 
ctable_all <- cbind(ctable, "p value" = p)

### Predicted Probabilities 

pred_risk <- function(df_name,model_name,intervention_name,var_name,respondents_name,anon,social,coethnic){
  newdat <- with(df_name,data.frame(
    treat.anon=anon,
    treat.social=social,
    police.coethnic=coethnic,
    female=mean(female),
    age=mean(age,na.rm=T),
    education=mean(education,na.rm=T),
    employees.paid=mean(employees.paid),
    relig.christ=mean(relig.christ),
    market.agege=mean(market.agege),
    market.bariga=mean(market.bariga),
    market.mushin=mean(market.mushin)
  ))
  newdat <- cbind(newdat, predict(model_name, newdat, type = "probs", se.fit=TRUE))
  probs <- marginaleffects::predictions(model_name,newdata = newdat, type = "probs")
  probs <- cbind(rep(c("Treatment","Control"),4),probs[,c(2:4)])
  colnames(probs) <- c("Status","Level","Probability","SE")
  probs$Prob.Label <- paste0(round(probs$Probability*100,0),"%")
  probs$Status <- factor(probs$Status,levels=c("Treatment","Control"))
  probs$Intervention <- intervention_name
  probs$Respondents <- respondents_name
  return(probs)
}
pred_anon_all <- pred_risk(df_name=s,model_name=m1_all,anon=c(1,0),social=0,coethnic=0,var_name="treat.anon",intervention_name="Anonymity",respondents_name="All")

### Create dataframe with predictions

pred_anon_all$Level <- factor(pred_anon_all$Level,
   levels=rev(c("Not at all risky","A little risky","Risky","Very risky")))
ct <- data.frame(treat.anon=t(round(ctable_all[1,1:2],3)))
names(ct) <- c("value","se")
ct$se <- paste0(paste0("(",ct$se),")")
ct$value <- paste0(ct$value,c("***"))
ct$coefs <- paste(ct$value,ct$se)
ct$coefs <- paste("Coef:",ct$coefs)
pred_anon_all$Coefficients <- c(ct$coefs, rep(NA,7))
pred_anon_all$Level <- factor(pred_anon_all$Level, levels=rev(levels(pred_anon_all$Level)))

### Create plot

plot1 <- ggplot(pred_anon_all,aes(x=Probability,y=Level,color=Status))+
  geom_point(size=2, position = position_dodge(width = -1/2)) + 
  geom_linerange(aes(y = Level, xmin = Probability - SE*interval2,
                xmax = Probability + SE*interval2),
                 lwd = 1,position = position_dodge(width = -1/2),
                 show.legend = F)+
  geom_label_repel(data=pred_anon_all, aes(y=Level, x=Probability, label = Prob.Label,color=Status), 
                   inherit.aes=T, size=3, na.rm = FALSE,show.legend = FALSE,
                   position = position_dodge(width = -1/2),
                   segment.color = NA)+
  scale_x_continuous(labels=percent_format(accuracy=1), breaks = seq(0,1,by=.1), limits = c(0,.7)) +
  geom_text(data=pred_anon_all,aes(x=.525,y=4.25,label=Coefficients),size=3)+
  labs(y="Perceived Risk", x="Probability of Perceived Risk")+
  theme_few()+theme(legend.position = "bottom",legend.title = element_blank(),
                    panel.grid.major.y = element_line(colour = "black",size=.2)
  )+scale_colour_grey(start = 0,end = .7); plot1

#### ONLINE APPENDIX ####

#### + Table 1: Market Areas ####

# Calculate Ethnolinguistic Fractionalization score
ms <- ddply(s,"market",summarise,
            yoruba=mean(ethnicity.yoruba,na.rm=T)*100,
            igbo=mean(ethnicity.igbo,na.rm=T)*100,
            hf=mean(ethnicity.hf,na.rm=T)*100,
            other=mean(ethnicity.other,na.rm=T)*100); ms
ms.melt <- melt(ms,"market")
ms$elf <- NA
for(i in 1:nrow(ms)){
  ms.melt.samp <- subset(ms.melt,market==unique(ms.melt$market)[i])
  ms$elf[i] <- 1 - sum((ms.melt.samp$value/100)^2)
  }

# Calculate Total Shops by Market
market.total <- ddply(mc,"market",summarise,
            shops.total=sum(shops.total,na.rm=T))

# Merge ELF score
mtab <- merge(market.total,ms[,c("market","elf")],by="market")

# Merge witness variable
mtab <- merge(mtab,
              ddply(s,"market",summarise,witness.mean=mean(witness,na.rm=T)*100),
              by="market")

# Round numerical outputs
mtab <- mtab[,c("market","witness.mean","shops.total","elf")]
mtab$witness.mean <- round(mtab$witness.mean,1)
mtab$elf <- round(mtab$elf,2)
colnames(mtab) <- c("Market","Witness Fight (%)","Total Shops","Ethnic Diversity")
print(mtab)

#### + Table 3: Survey Respondent Demographics ####

### Transform covariate variables into character vector for summary table

# Gender
s$Gender <- ifelse(s$female==1,"Female","Male")
s$Gender <- factor(s$Gender, 
                   c("Female","Male"))

# Age
s$Age <- s$age
s$Age <- replace(s$Age, s$Age==1|s$Age==2, "18-30")
s$Age <- replace(s$Age, s$Age==3, "31-40")
s$Age <- replace(s$Age, s$Age==4, "41-50")
s$Age <- replace(s$Age, s$Age==5, "51-60")
s$Age <- replace(s$Age, s$Age==6|s$Age==7, "Over 60")
s$Age <- factor(s$Age, c("18-30","31-40","41-50",
                         "51-60","Over 60"))

# Education
s$Education <- s$education
s$Education <- replace(s$Education, s$Education==1, "Primary or less")
s$Education <- replace(s$Education, s$Education==2, "Secondary")
s$Education <- replace(s$Education, s$Education==3, "Vocational")
s$Education <- replace(s$Education, s$Education==4, "University")
s$Education <- replace(s$Education, s$Education==5, "Post-graduate")
s$Education <- factor(s$Education, c("Primary or less",
                                     "Secondary","Vocational",
                                     "University","Post-graduate"))

# Paid Employees 
s$Employees <- s$employees.paid
s$Employees <- replace(s$Employees, s$Employees>=4, "Over 3")
s$Employees <- factor(s$Employees, c("0","1","2","3","Over 3"))

# Religion
s$Religion <- factor(s$religion,c("Christian","Muslim","Other"))

# Ethnicity
s$Ethnicity <- factor(s$ethnicity,c("Yoruba","Igbo","Other"))

# Market
s$Market <- factor(s$market,c("Agege","Bariga","Mushin","Oshodi"))

# Print table (latex output)
sl <- s[,c("Gender","Age","Education","Employees","Religion","Ethnicity","Market")]
tableNominal(sl, cap="Survey Respondent Summary Statistics")

#### + Figure 3: Association with Amount of Information in Witness Scenario ####

# Remove NAs
sa <- s[,c("retaliation10", "coop.others10","Cooperation_Will")]
sa <- sa[complete.cases(sa),]; nrow(sa)

# Retaliation
plot1 <- ggplot(sa, aes(x = retaliation10/10, y = Cooperation_Will)) +
  geom_boxplot(size = .75,alpha=.75) +
  geom_jitter(alpha = .25,size=1) +
  scale_x_continuous(labels=percent_format(accuracy=1), breaks = seq(0,1,by=.2), limits = c(0,1)) +
  labs(x="Perceived Retaliation Likelihood",y="Amount of Information")+
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))+theme_few(); plot1

# Cooperation Norms
plot2 <- ggplot(sa, aes(x = coop.others10/10, y = Cooperation_Will)) +
  geom_boxplot(size = .75,alpha=.75) +
  geom_jitter(alpha = .25,size=1) +
  scale_x_continuous(labels=percent_format(accuracy=1), breaks = seq(0,1,by=.2), limits = c(0,1)) +
  labs(x="Perceived Proporiton of Community Willing to Cooperate",y="Amount of Information")+
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))+theme_few(); plot2

#### + Figure 4: Correlation between Perceived Retaliation Likelihood and Perceived Proportion of Community Cooperation ####

# Calculate correlation
corr <- s[,c("retaliation10","coop.others10")]
corr <- corr %>% na.omit
cor.test(corr$retaliation10, y=corr$coop.others10, method = c("pearson"))

# Transform variables to percentage
s$retaliation10_per <- s$retaliation10/10
s$coop.others10_per <- s$coop.others10/10

# Create plot
plot1 <- ggplot(s,aes(x=jitter(retaliation10_per),y=jitter(coop.others10_per)))+
  geom_point(alpha=.1)+geom_smooth(color="black",se=T)+theme_minimal()+
  scale_x_continuous(labels=percent_format(accuracy = 1), breaks = seq(-1,1,by=.2))+
  scale_y_continuous(labels=percent_format(accuracy = 1), breaks = seq(-1,1,by=.2))+
  annotate("text", x=.7, y=0.9,label="Pearson's correlation\n-0.30 [95% CI: -0.35, -0.24]",size=3.5) +
  labs(x="Perceived Retaliation Likelihood", y="Perceived Proportion of Community Cooperation"); plot1

#### + Table 4: Ordered Probit of Amount of Information Predicted to Share ####

### Run probit models

# Model 1 (Retaliation & Cooperation Norms)
m1 <- polr(paste("Cooperation_Will ~", paste0(covs.main,covs.eth.regFX)), 
           method="probit", data = s, Hess=TRUE); summary(m1); nobs(m1)
# Model 2 (Retaliation)
m2 <- polr(paste("Cooperation_Will ~", paste0("retaliation10+",covs.eth.regFX)), 
           method="probit", data = s, Hess=TRUE); summary(m2); nobs(m2)
# Model 3 (Cooperation Norms)
m3 <- polr(paste("Cooperation_Will ~", paste0("coop.others10+",covs.eth.regFX)), 
           method="probit", data = s, Hess=TRUE); summary(m3); nobs(m3)

### Create table

covar.names.demo <- c(
  "Retaliation","Cooperation Norms",
  "Female","Age","Education","Employees",
  "Christian","Yoruba","Ineffectiveness","Mistreatment","Market: Agege",
  "Market: Bariga","Market: Mushin",
  "Constant")
stargazer(m1,m2,m3,type="text",
          font.size = "tiny",
          digits = 2,
          dep.var.labels=c("Change in Info Amount"),
          covariate.labels=covar.names.demo,
          title = "Ordered Probit of Amount of  Information Willing to Share"
)

#### + Figure 5: Amount of Information Respondents Willing to Share ####

### Create datasets for Yoruba and Non-Yoruba respondents

# Yorauba
sy <- subset(s, ethnicity.yoruba==1)

# Non-Yoruba
sny <- subset(s, ethnicity.yoruba==0)

### Create dataframe

tab0 <- data.frame(table(s$coop.will)/sum(table(s$coop.will)))
tab1 <- data.frame(table(sy$coop.will)/sum(table(sy$coop.will)))
tab2 <- data.frame(table(sny$coop.will)/sum(table(sny$coop.will)))
tab <- rbind(tab0, tab1, tab2)
tab$Ethnicity <- c(rep("All",4),rep("Yoruba",4),rep("Non-Yoruba",4))
names(tab)[1:2] <- c("Cooperation","Freq")
tab$Ethnicity <- factor(tab$Ethnicity,levels = c("Non-Yoruba","Yoruba","All"))
tab$Cooperation <- rep(c("None","A little","Some","Everything"),3)
tab$Cooperation <- factor(tab$Cooperation,
                          levels=rev(c("None","A little","Some","Everything")))
pos <- ddply(tab, "Ethnicity", transform, 
             position = (cumsum(Freq)-Freq) + (Freq/2));pos

# Create plot
plot1 <- ggplot(pos,aes(x = Ethnicity, y = Freq, fill = Cooperation)) + 
  geom_bar(stat = "identity") +
  scale_y_continuous(labels=percent, 
                     breaks = seq(0,1,by=.2), limits = c(0,1)) +
  coord_flip() + theme_few() +
  theme(legend.position="bottom") +
  ylab("Percent of Respondents") + xlab("") +
  labs(fill="") + # remove legend title
  scale_fill_grey(end=0.8, start=0.2) + 
  geom_text(aes(label = paste0(round(Freq*100,0),"%"),y=position),
            fontface='bold',color="white",size = 3)+
  guides(fill = guide_legend(reverse = T)); plot1

#### + Figure 6: Respondent Perception of Cooperation Norms ####

s.coop <- s[!is.na(s$coop.others10),]
plot1 <- ggplot(s.coop,aes(x=factor(paste0(coop.others10*10,"%"))))+
  geom_bar(aes(y=..count../sum(..count..)))+
  labs(x="Perceived Percent of Community Willing to Cooperate",
       y="Proportion of Respondents")+theme_few(); plot1

#### + Table 5: Prior Cooperation Summary Statistics ####

### Transform cooperation prior variable to character vector

s$Cooperation_Prior <- s$coop.prior
s$Cooperation_Prior <- replace(s$Cooperation_Prior, s$Cooperation_Prior==0, "None")
s$Cooperation_Prior <- replace(s$Cooperation_Prior, s$Cooperation_Prior==1, "A little")
s$Cooperation_Prior <- replace(s$Cooperation_Prior, s$Cooperation_Prior==2, "Some")
s$Cooperation_Prior <- replace(s$Cooperation_Prior, s$Cooperation_Prior==3, "Everything")
s$Cooperation_Prior <- factor(s$Cooperation_Prior, levels = c("Everything","Some","A little","None"))

### Print table

sw <- subset(s,witness==1)
sl <- data.frame(Cooperation_Prior=sw[,c("Cooperation_Prior")])
tableNominal(sl, cap="Prior Cooperation Summary Statistics")

#### + Table 6: Experiment Covariate Balance Table ####

### Create dataframe for table

covs.eth <- c("female","age","education","employees.paid",
              "relig.christ","ethnicity.yoruba","ineffectiveness","mistreatment")
covs.tab <- s[,c(covs.eth,"treat.anon","treat.social","police.coethnic")]
names <- c("Female","Age","Education","Employees","Christian","Yoruba")
levels <- c(length(unique(covs.tab$female)),
            length(unique(covs.tab$age)),
            length(unique(covs.tab$education)),
            length(unique(covs.tab$employees.paid)),
            length(unique(covs.tab$relig.christ)),
            length(unique(covs.tab$e)))

### Calculate balances

# Anonymity
mean.ac <- c(mean(covs.tab[which(covs.tab$treat.anon==0),covs.eth[1]],na.rm=T),
             mean(covs.tab[which(covs.tab$treat.anon==0),covs.eth[2]],na.rm=T),
             mean(covs.tab[which(covs.tab$treat.anon==0),covs.eth[3]],na.rm=T),
             mean(covs.tab[which(covs.tab$treat.anon==0),covs.eth[4]],na.rm=T),
             mean(covs.tab[which(covs.tab$treat.anon==0),covs.eth[5]],na.rm=T),
             mean(covs.tab[which(covs.tab$treat.anon==0),covs.eth[6]],na.rm=T))
se.ac <- c(std(covs.tab[which(covs.tab$treat.anon==0),covs.eth[1]]),
           std(covs.tab[which(covs.tab$treat.anon==0),covs.eth[2]]),
           std(covs.tab[which(covs.tab$treat.anon==0),covs.eth[3]]),
           std(covs.tab[which(covs.tab$treat.anon==0),covs.eth[4]]),
           std(covs.tab[which(covs.tab$treat.anon==0),covs.eth[5]]),
           std(covs.tab[which(covs.tab$treat.anon==0),covs.eth[6]]))
mean.a <- c(mean(covs.tab[which(covs.tab$treat.anon==1),covs.eth[1]],na.rm=T),
            mean(covs.tab[which(covs.tab$treat.anon==1),covs.eth[2]],na.rm=T),
            mean(covs.tab[which(covs.tab$treat.anon==1),covs.eth[3]],na.rm=T),
            mean(covs.tab[which(covs.tab$treat.anon==1),covs.eth[4]],na.rm=T),
            mean(covs.tab[which(covs.tab$treat.anon==1),covs.eth[5]],na.rm=T),
            mean(covs.tab[which(covs.tab$treat.anon==1),covs.eth[6]],na.rm=T))
se.a <- c(std(covs.tab[which(covs.tab$treat.anon==1),covs.eth[1]]),
          std(covs.tab[which(covs.tab$treat.anon==1),covs.eth[2]]),
          std(covs.tab[which(covs.tab$treat.anon==1),covs.eth[3]]),
          std(covs.tab[which(covs.tab$treat.anon==1),covs.eth[4]]),
          std(covs.tab[which(covs.tab$treat.anon==1),covs.eth[5]]),
          std(covs.tab[which(covs.tab$treat.anon==1),covs.eth[6]]))
ks.a <- c(ks.test(covs.tab[which(covs.tab$treat.anon==1),covs.eth[1]], 
                  covs.tab[which(covs.tab$treat.anon==0),covs.eth[1]],
                  alternative = c("two.sided"))$p.value,
          ks.test(covs.tab[which(covs.tab$treat.anon==1),covs.eth[2]], 
                  covs.tab[which(covs.tab$treat.anon==0),covs.eth[2]],
                  alternative = c("two.sided"))$p.value,
          ks.test(covs.tab[which(covs.tab$treat.anon==1),covs.eth[3]], 
                  covs.tab[which(covs.tab$treat.anon==0),covs.eth[3]],
                  alternative = c("two.sided"))$p.value,
          ks.test(covs.tab[which(covs.tab$treat.anon==1),covs.eth[4]], 
                  covs.tab[which(covs.tab$treat.anon==0),covs.eth[4]],
                  alternative = c("two.sided"))$p.value,
          ks.test(covs.tab[which(covs.tab$treat.anon==1),covs.eth[5]], 
                  covs.tab[which(covs.tab$treat.anon==0),covs.eth[5]],
                  alternative = c("two.sided"))$p.value,
          ks.test(covs.tab[which(covs.tab$treat.anon==1),covs.eth[6]], 
                  covs.tab[which(covs.tab$treat.anon==0),covs.eth[6]],
                  alternative = c("two.sided"))$p.value)

# Cooperation Awareness
mean.sc <- c(mean(covs.tab[which(covs.tab$treat.social==0),covs.eth[1]],na.rm=T),
             mean(covs.tab[which(covs.tab$treat.social==0),covs.eth[2]],na.rm=T),
             mean(covs.tab[which(covs.tab$treat.social==0),covs.eth[3]],na.rm=T),
             mean(covs.tab[which(covs.tab$treat.social==0),covs.eth[4]],na.rm=T),
             mean(covs.tab[which(covs.tab$treat.social==0),covs.eth[5]],na.rm=T),
             mean(covs.tab[which(covs.tab$treat.social==0),covs.eth[6]],na.rm=T))
se.sc <- c(std(covs.tab[which(covs.tab$treat.social==0),covs.eth[1]]),
           std(covs.tab[which(covs.tab$treat.social==0),covs.eth[2]]),
           std(covs.tab[which(covs.tab$treat.social==0),covs.eth[3]]),
           std(covs.tab[which(covs.tab$treat.social==0),covs.eth[4]]),
           std(covs.tab[which(covs.tab$treat.social==0),covs.eth[5]]),
           std(covs.tab[which(covs.tab$treat.social==0),covs.eth[6]]))
mean.s <- c(mean(covs.tab[which(covs.tab$treat.social==1),covs.eth[1]],na.rm=T),
            mean(covs.tab[which(covs.tab$treat.social==1),covs.eth[2]],na.rm=T),
            mean(covs.tab[which(covs.tab$treat.social==1),covs.eth[3]],na.rm=T),
            mean(covs.tab[which(covs.tab$treat.social==1),covs.eth[4]],na.rm=T),
            mean(covs.tab[which(covs.tab$treat.social==1),covs.eth[5]],na.rm=T),
            mean(covs.tab[which(covs.tab$treat.social==1),covs.eth[6]],na.rm=T))
se.s <- c(std(covs.tab[which(covs.tab$treat.social==1),covs.eth[1]]),
          std(covs.tab[which(covs.tab$treat.social==1),covs.eth[2]]),
          std(covs.tab[which(covs.tab$treat.social==1),covs.eth[3]]),
          std(covs.tab[which(covs.tab$treat.social==1),covs.eth[4]]),
          std(covs.tab[which(covs.tab$treat.social==1),covs.eth[5]]),
          std(covs.tab[which(covs.tab$treat.social==1),covs.eth[6]]))
ks.s <- c(ks.test(covs.tab[which(covs.tab$treat.social==1),covs.eth[1]], 
                  covs.tab[which(covs.tab$treat.social==0),covs.eth[1]],
                  alternative = c("two.sided"))$p.value,
          ks.test(covs.tab[which(covs.tab$treat.social==1),covs.eth[2]], 
                  covs.tab[which(covs.tab$treat.social==0),covs.eth[2]],
                  alternative = c("two.sided"))$p.value,
          ks.test(covs.tab[which(covs.tab$treat.social==1),covs.eth[3]], 
                  covs.tab[which(covs.tab$treat.social==0),covs.eth[3]],
                  alternative = c("two.sided"))$p.value,
          ks.test(covs.tab[which(covs.tab$treat.social==1),covs.eth[4]], 
                  covs.tab[which(covs.tab$treat.social==0),covs.eth[4]],
                  alternative = c("two.sided"))$p.value,
          ks.test(covs.tab[which(covs.tab$treat.social==1),covs.eth[5]], 
                  covs.tab[which(covs.tab$treat.social==0),covs.eth[5]],
                  alternative = c("two.sided"))$p.value,
          ks.test(covs.tab[which(covs.tab$treat.social==1),covs.eth[6]], 
                  covs.tab[which(covs.tab$treat.social==0),covs.eth[6]],
                  alternative = c("two.sided"))$p.value)

# Police co-ethnicity
mean.ec <- c(mean(covs.tab[which(covs.tab$police.coethnic==0),covs.eth[1]],na.rm=T),
             mean(covs.tab[which(covs.tab$police.coethnic==0),covs.eth[2]],na.rm=T),
             mean(covs.tab[which(covs.tab$police.coethnic==0),covs.eth[3]],na.rm=T),
             mean(covs.tab[which(covs.tab$police.coethnic==0),covs.eth[4]],na.rm=T),
             mean(covs.tab[which(covs.tab$police.coethnic==0),covs.eth[5]],na.rm=T),
             mean(covs.tab[which(covs.tab$police.coethnic==0),covs.eth[6]],na.rm=T))
se.ec <- c(std(covs.tab[which(covs.tab$police.coethnic==0),covs.eth[1]]),
           std(covs.tab[which(covs.tab$police.coethnic==0),covs.eth[2]]),
           std(covs.tab[which(covs.tab$police.coethnic==0),covs.eth[3]]),
           std(covs.tab[which(covs.tab$police.coethnic==0),covs.eth[4]]),
           std(covs.tab[which(covs.tab$police.coethnic==0),covs.eth[5]]),
           std(covs.tab[which(covs.tab$police.coethnic==0),covs.eth[6]]))
mean.e <- c(mean(covs.tab[which(covs.tab$police.coethnic==1),covs.eth[1]],na.rm=T),
            mean(covs.tab[which(covs.tab$police.coethnic==1),covs.eth[2]],na.rm=T),
            mean(covs.tab[which(covs.tab$police.coethnic==1),covs.eth[3]],na.rm=T),
            mean(covs.tab[which(covs.tab$police.coethnic==1),covs.eth[4]],na.rm=T),
            mean(covs.tab[which(covs.tab$police.coethnic==1),covs.eth[5]],na.rm=T),
            mean(covs.tab[which(covs.tab$police.coethnic==1),covs.eth[6]],na.rm=T))
se.e <- c(std(covs.tab[which(covs.tab$police.coethnic==1),covs.eth[1]]),
          std(covs.tab[which(covs.tab$police.coethnic==1),covs.eth[2]]),
          std(covs.tab[which(covs.tab$police.coethnic==1),covs.eth[3]]),
          std(covs.tab[which(covs.tab$police.coethnic==1),covs.eth[4]]),
          std(covs.tab[which(covs.tab$police.coethnic==1),covs.eth[5]]),
          std(covs.tab[which(covs.tab$police.coethnic==1),covs.eth[6]]))
ks.e <- c(ks.test(covs.tab[which(covs.tab$police.coethnic==1),covs.eth[1]], 
                  covs.tab[which(covs.tab$police.coethnic==0),covs.eth[1]],
                  alternative = c("two.sided"))$p.value,
          ks.test(covs.tab[which(covs.tab$police.coethnic==1),covs.eth[2]], 
                  covs.tab[which(covs.tab$police.coethnic==0),covs.eth[2]],
                  alternative = c("two.sided"))$p.value,
          ks.test(covs.tab[which(covs.tab$police.coethnic==1),covs.eth[3]], 
                  covs.tab[which(covs.tab$police.coethnic==0),covs.eth[3]],
                  alternative = c("two.sided"))$p.value,
          ks.test(covs.tab[which(covs.tab$police.coethnic==1),covs.eth[4]], 
                  covs.tab[which(covs.tab$police.coethnic==0),covs.eth[4]],
                  alternative = c("two.sided"))$p.value,
          ks.test(covs.tab[which(covs.tab$police.coethnic==1),covs.eth[5]], 
                  covs.tab[which(covs.tab$police.coethnic==0),covs.eth[5]],
                  alternative = c("two.sided"))$p.value,
          ks.test(covs.tab[which(covs.tab$police.coethnic==1),covs.eth[6]], 
                  covs.tab[which(covs.tab$police.coethnic==0),covs.eth[6]],
                  alternative = c("two.sided"))$p.value)

### Combine results

bal.tab <- data.frame(rbind(cbind(mean.a,se.a, mean.ac,se.ac, ks.a),
                            cbind(mean.s,se.s, mean.sc,se.sc, ks.s), 
                            cbind(mean.e,se.e, mean.ec, se.ec,ks.e)))
bal.tab <- cbind(rep(names,3),bal.tab)
bal.tab <- cbind(c("Anonymity",rep(NA,5),
                   "Coop. Awareness",rep(NA,5),
                   "Police Co-Ethnic",rep(NA,5)),bal.tab)
colnames(bal.tab) <- c(" ","Variable","Mean_Treat","SE_Treat","Mean_Control","SE_Control","K-S P-val")
bal.tab

#### + Figure 9: Information-sharing Summary Statistics ####

### Create dataframe

tab0 <- data.frame(table(s$exp.coop)/sum(table(s$exp.coop)))
tab1 <- data.frame(table(sy$exp.coop)/sum(table(sy$exp.coop)))
tab2 <- data.frame(table(sny$exp.coop)/sum(table(sny$exp.coop)))
tab <- rbind(tab0, tab1,tab2)
tab$Ethnicity <- c(rep("All",4),rep("Yoruba",4),rep("Non-Yoruba",4))
names(tab)[1:2] <- c("Cooperation","Freq")
tab$Ethnicity <- factor(tab$Ethnicity,levels = c("Non-Yoruba","Yoruba","All"))
tab$Cooperation <- rep(c("None","A little","Some","Everything"),3)
tab$Cooperation <- factor(tab$Cooperation,levels=rev(c("None","A little","Some","Everything")))
pos <- ddply(tab, "Ethnicity", transform, position = (cumsum(Freq)-Freq) + (Freq/2));pos

### Create plot

plot1 <- ggplot(pos,aes(x = Ethnicity, y = Freq, fill = Cooperation)) + 
  geom_bar(stat = "identity") +
  scale_y_continuous(labels=percent, 
                     breaks = seq(0,1,by=.2), limits = c(0,1)) +
  coord_flip() + theme_few() +
  theme(legend.position="bottom") +
  ylab("Percent of Respondents") + xlab("") +
  labs(fill="") +
  scale_fill_grey(end=0.8, start=0.2) + 
  geom_text(aes(label = paste0(round(Freq*100,0),"%"),y=position),
            fontface='bold',color="white",size = 3)+
  guides(fill = guide_legend(reverse = T)); plot1

#### + Table 7: Treatment Effect on Information-sharing (Probit) ####

### Transform outcome variable to characters

s$Coop_Will_Experiment <- s$exp.coop
s$Coop_Will_Experiment <- replace(s$Coop_Will_Experiment, s$Coop_Will_Experiment==0, "Nothing")
s$Coop_Will_Experiment <- replace(s$Coop_Will_Experiment, s$Coop_Will_Experiment==1, "A little")
s$Coop_Will_Experiment <- replace(s$Coop_Will_Experiment, s$Coop_Will_Experiment==2, "Some")
s$Coop_Will_Experiment <- replace(s$Coop_Will_Experiment, s$Coop_Will_Experiment==3, "Everything")
s$Coop_Will_Experiment <- factor(s$Coop_Will_Experiment, levels=rev(c("Everything","Some","A little","Nothing")))

### Create variable vectors

outcome.probit <- paste0("Coop_Will_Experiment","~")
covs.reg.probit <- "female+age+education+employees.paid+relig.christ+market.agege+market.bariga+market.mushin"
variables.probit <- paste0("treat.anon*treat.social*police.coethnic+", covs.reg.probit)

### Run probit regressions

# All Respondents
m1_all <- polr(paste(outcome.probit, variables.probit), 
               method="probit", data = s, Hess=TRUE); summary(m1_all)
ctable <- coef(summary(m1_all))
p <- pnorm(abs(ctable[, "t value"]), lower.tail = FALSE) * 2 # p-values
ctable_all <- cbind(ctable, "p value" = p)

# Yoruba 
m1_y <- polr(paste(outcome.probit, variables.probit), method="probit",data = subset(s,ethnicity.yoruba==1), Hess=TRUE); summary(m1_y)
ctable <- coef(summary(m1_y))
p <- pnorm(abs(ctable[, "t value"]), lower.tail = FALSE) * 2 
ctable_y <- cbind(ctable, "p value" = p)

# Non-Yoruba
m1_ny <- polr(paste(outcome.probit, variables.probit), method="probit",data = subset(s,ethnicity.yoruba==0), Hess=TRUE); summary(m1_ny)
ctable <- coef(summary(m1_ny))
p <- pnorm(abs(ctable[, "t value"]), lower.tail = FALSE) * 2 
ctable_ny <- cbind(ctable, "p value" = p)

### Calculate predicted probabilities 

pred <- function(df_name,model_name, intervention_name, var_name, respondents_name, anon, social,coethnic){
  require(reshape2)
  newdat <- with(df_name,data.frame(
    treat.anon=anon,
    treat.social=social,
    police.coethnic=coethnic,
    female=mean(female),
    age=mean(age,na.rm=T),
    education=mean(education,na.rm=T),
    employees.paid=mean(employees.paid),
    relig.christ=mean(relig.christ),
    market.agege=mean(market.agege),
    market.bariga=mean(market.bariga),
    market.mushin=mean(market.mushin)
  ))
  newdat <- cbind(newdat, predict(model_name, newdat, type = "probs", se.fit=TRUE))
  probs <- marginaleffects::predictions(model_name,newdata = newdat, type = "probs")
  probs <- cbind(rep(c("Treatment","Control"),4),probs[,c(2:4)])
  colnames(probs) <- c("Status","Level","Probability","SE")
  probs$Prob.Label <- paste0(round(probs$Probability*100,0),"%")
  probs$Status <- factor(probs$Status,levels=c("Treatment","Control"))
  probs$Intervention <- intervention_name
  probs$Respondents <- respondents_name
  return(probs)
}
pred_anon_all <- pred(df_name=s,model_name=m1_all,anon=c(1,0),social=0,coethnic=0,var_name="treat.anon",intervention_name="Anonymity",respondents_name="All")
pred_anon_y <- pred(df_name=subset(s,ethnicity.yoruba==1),model_name=m1_y,anon=c(1,0),social=0,coethnic=0,var_name="treat.anon",intervention_name="Anonymity",respondents_name="Yoruba")
pred_anon_ny <- pred(df_name=subset(s,ethnicity.yoruba==0),model_name=m1_ny,anon=c(1,0),social=0,coethnic=0,var_name="treat.anon",intervention_name="Anonymity",respondents_name="Non-Yoruba")
pred_social_all <- pred(df_name=s,model_name=m1_all,anon=0,social=c(1,0),coethnic=0,var_name="treat.social",intervention_name="Awareness",respondents_name="All")
pred_social_y <- pred(df_name=subset(s,ethnicity.yoruba==1),model_name=m1_y,anon=0,social=c(1,0),coethnic=0,var_name="treat.social",intervention_name="Awareness",respondents_name="Yoruba")
pred_social_ny <- pred(df_name=subset(s,ethnicity.yoruba==0),model_name=m1_ny,anon=0,social=c(1,0),coethnic=0,var_name="treat.social",intervention_name="Awareness",respondents_name="Non-Yoruba")
pred_coethnic_all <- pred(df_name=s,model_name=m1_all,anon=0,social=0,coethnic=c(1,0),var_name="police.coethnic",intervention_name="Co-ethnic",respondents_name="All")
pred_coethnic_y <- pred(df_name=subset(s,ethnicity.yoruba==1),model_name=m1_y,anon=0,social=0,coethnic=c(1,0),var_name="police.coethnic",intervention_name="Co-ethnic",respondents_name="Yoruba")
pred_coethnic_ny <- pred(df_name=subset(s,ethnicity.yoruba==0),model_name=m1_ny,anon=0,social=0,coethnic=c(1,0),var_name="police.coethnic",intervention_name="Co-ethnic",respondents_name="Non-Yoruba")

### Combine results into dataframe

pred_all <- rbind(
  pred_anon_all,pred_anon_y,pred_anon_ny,
  pred_social_all,pred_social_y,pred_social_ny,
  pred_coethnic_all,pred_coethnic_y,pred_coethnic_ny
)
pred_all$Respondents[pred_all$Respondents=="All"] <- "All Respondents"
pred_all$Respondents <- factor(pred_all$Respondents,
                               levels = c("All Respondents","Yoruba","Non-Yoruba"))
pred_all$Level <- factor(pred_all$Level,
                         levels=rev(c("Everything","Some","A little","Nothing")))
ct <- data.frame(round(rbind(ctable_all[1:2,1:2],
                             ctable_y[1:2,1:2],
                             ctable_ny[1:2,1:2]),3))
ct <- ct[c(1,3,5,2,4,6),]
names(ct) <- c("value","se")
ct$se <- paste0(paste0("(",ct$se),")")
ct$value <- paste0(ct$value,c("***","","***","*","",""))
ct$coefs <- paste(ct$value,ct$se)
ct$coefs <- paste("Coef:",ct$coefs)
coef.vals <- ct$coefs
labs <- data.frame(
  Status="Treatment",Level="Everything",
  Intervention=c(rep("Anonymity",3),rep("Awareness",3)),
  Respondents=rep(c("All Respondents","Yoruba","Non-Yoruba"),2),
  Coefficients=coef.vals
)
pred_all <- merge(pred_all,labs,all.x=T,by=c("Intervention","Respondents","Status","Level"))

### Create table

covar.names.demo <- c("Anonymity", "Cooperation Awareness",
                      "In-group Police","Female",
                      "Age","Education","Employees","Christian",
                      "Market: Agege","Market: Bariga","Market: Mushin",
                      "Anonymity x Awareness","Anonymity x In-group","Awareness x In-group","Anon. x Aware. x In-group")
model.names <-  c("\\hspace{.7cm} (All) \\hspace{.7cm}","\\hspace{.3cm} (Yoruba) \\hspace{.3cm}","(Non-Yoruba)")
stargazer(m1_all,m1_y,m1_ny, type="text",
          label = "tab:probit_exp",
          column.sep.width = "5pt",
          font.size = "scriptsize",
          model.numbers=FALSE,
          digits = 2,
          dep.var.labels=paste("Change in Info Amount"),
          column.labels=model.names,
          covariate.labels=covar.names.demo,
          title = "Treatment Effect on Information-sharing (Probit)" 
)

#### + Table 8: Treatment Effect on Information-sharing ####

### Create variable vectors

covs <- c("female","age","education","employees.paid","relig.christ")
covs.reg <- paste(covs,collapse="+")
covsFX <- c(covs,"market.agege","market.bariga","market.mushin") 
covs.regFX <- paste0(covs.reg,"+market.agege+market.bariga+market.mushin") 

### Transform outcome to logged variable

s$exp.coop.log <- log(s$exp.coop+1)
sy$exp.coop.log <- log(sy$exp.coop+1)
sny$exp.coop.log <- log(sny$exp.coop+1)

### Run regression models

outcome <- paste0("exp.coop.log","~")
variables <- paste0("treat.anon*treat.social*police.coethnic+",covs.regFX)
out <- lm(paste0(outcome, variables),s)
out.sum <- out
out <- coeftest(out, vcov = vcovHC(out, type="HC1"))
out.y <- lm(paste0(outcome, variables),subset(s,ethnicity.yoruba==1))
out.y.sum <- out.y
out.y <- coeftest(out.y, vcov = vcovHC(out.y, type="HC1"))
out.ny <- lm(paste0(outcome, variables),subset(s,ethnicity.yoruba==0))
out.ny.sum <- out.ny
out.ny <- coeftest(out.ny, vcov = vcovHC(out.ny, type="HC1"))

### Create table

model.names <- c("(All)","(Yoruba)","(Non-Yoruba)")
covar.names.demo <- c("Anonymity", "Cooperation Awareness",
                      "In-group Police","Female",
                      "Age","Education","Employees","Christian",
                      "Market: Agege","Market: Bariga","Market: Mushin",
                      "Anonymity x Awareness","Anonymity x In-group","Awareness x In-group","Anon. x Aware. x In-group")
stargazer(out,out.y,out.ny,type="text",
          column.sep.width = "5pt",
          se=list(out[,2],out.y[,2],out.ny[,2]),
          font.size = "scriptsize",
          model.numbers=FALSE,
          digits = 2,
          dep.var.labels=paste("Change in Info Amount (log)"),
          column.labels=model.names,
          covariate.labels=covar.names.demo,
          title = "Treatment Effect on Information-sharing"
)
